{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Streetcar Delay Prediction - XGBoost - REFACTORED\n",
    "\n",
    "GOAL: predict streetcar delays using XGBoost (for comparison with deep learning model)\n",
    "\n",
    "Refactored to look at delays by hour by day by route by direction\n",
    "\n",
    "Source dataset: : https://open.toronto.ca/dataset/ttc-streetcar-delay-data/"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Links to key parts of the notebook <a name='linkanchor' />\n",
    "<a href=#ingestdash>Ingest data</a>\n",
    "\n",
    "<a href=#definecategories>Define feature categories</a>\n",
    "\n",
    "<a href=#bookmark>Deal with missing values</a>\n",
    "\n",
    "<a href=#modelfit>Define and fit model</a>\n",
    "\n",
    "<a href=#reload>Reload saved model and weights</a>\n",
    "\n",
    "<a href=#confusionmatrix>Confusion matrix</a>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Common imports and global variable definitions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "# common imports\n",
    "import zipfile\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import time\n",
    "import seaborn as sns\n",
    "from xgboost import XGBClassifier\n",
    "# import datetime, timedelta\n",
    "import datetime\n",
    "from datetime import datetime, timedelta\n",
    "from datetime import date\n",
    "from dateutil import relativedelta\n",
    "from io import StringIO\n",
    "import pandas as pd\n",
    "import pickle\n",
    "from pickle import dump\n",
    "from pickle import load\n",
    "from sklearn.base import BaseEstimator\n",
    "from sklearn.base import TransformerMixin\n",
    "# DSX code to import uploaded documents\n",
    "from io import StringIO\n",
    "import requests\n",
    "import json\n",
    "from sklearn.preprocessing import LabelEncoder, MinMaxScaler, StandardScaler\n",
    "from sklearn.model_selection import train_test_split\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "import os\n",
    "import yaml\n",
    "import math\n",
    "import sys\n",
    "from subprocess import check_output\n",
    "from IPython.display import display\n",
    "#model libraries\n",
    "from tensorflow.keras.layers import Input, Dropout, Dense, BatchNormalization, Activation, concatenate, GRU, Embedding, Flatten, BatchNormalization\n",
    "from tensorflow.keras.models import Model\n",
    "from tensorflow.keras.callbacks import ModelCheckpoint, Callback, EarlyStopping\n",
    "from tensorflow.keras import regularizers\n",
    "from tensorflow.keras.layers import BatchNormalization\n",
    "#from tf.keras.layers.normalization import BatchNormalization\n",
    "from tensorflow.keras.regularizers import l2\n",
    "from tensorflow.keras.optimizers import Adam\n",
    "from tensorflow.keras.optimizers import SGD\n",
    "from tensorflow.keras import backend as K\n",
    "# from tensorflow.keras.utils.vis_utils import plot_model\n",
    "from tensorflow.keras.utils import plot_model\n",
    "from tensorflow.keras.preprocessing.text import Tokenizer\n",
    "from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
    "from tensorflow.keras.models import load_model\n",
    "#import datetime\n",
    "#from datetime import date\n",
    "from sklearn import metrics\n",
    "# import pipeline libraries\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import accuracy_score\n",
    "from sklearn.metrics import average_precision_score\n",
    "from sklearn.pipeline import Pipeline\n",
    "from sklearn.base import TransformerMixin\n",
    "from sklearn.base import BaseEstimator\n",
    "from custom_classes import encode_categorical\n",
    "from custom_classes import prep_for_keras_input\n",
    "from custom_classes import fill_empty\n",
    "from custom_classes import encode_text\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'2.0.0'"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import tensorflow as tf\n",
    "tf.__version__ "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "current directory is: C:\\personal\\manning\\deep_learning_for_structured_data\\notebooks\n",
      "path_to_yaml C:\\personal\\manning\\deep_learning_for_structured_data\\notebooks\\streetcar_model_training_config.yml\n"
     ]
    }
   ],
   "source": [
    "# load config file\n",
    "current_path = os.getcwd()\n",
    "print(\"current directory is: \"+current_path)\n",
    "\n",
    "path_to_yaml = os.path.join(current_path, 'streetcar_model_training_config.yml')\n",
    "print(\"path_to_yaml \"+path_to_yaml)\n",
    "try:\n",
    "    with open (path_to_yaml, 'r') as c_file:\n",
    "        config = yaml.safe_load(c_file)\n",
    "except Exception as e:\n",
    "    print('Error reading the config file')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "date today 2020-04-05 15:17:22.971863\n",
      "start date 2014-01-01\n",
      "end date 2019-02-28\n"
     ]
    }
   ],
   "source": [
    "# load parameters\n",
    "\n",
    "testproportion = config['test_parms']['testproportion'] # proportion of data reserved for test set\n",
    "trainproportion = config['test_parms']['trainproportion'] # proportion of non-test data dedicated to training (vs. validation)\n",
    "verboseout = config['general']['verboseout']\n",
    "includetext = config['general']['includetext']\n",
    "\n",
    "presaved = config['general']['presaved']\n",
    "savemodel = config['general']['savemodel']\n",
    "picklemodel = config['general']['picklemodel']\n",
    "hctextmax = config['general']['hctextmax']\n",
    "maxwords = config['general']['maxwords']\n",
    "textmax = config['general']['textmax']\n",
    "\n",
    "targetthresh = config['general']['targetthresh']\n",
    "targetcontinuous = config['general']['targetcontinuous']\n",
    "\n",
    "#time of day thresholds\n",
    "time_of_day = {'overnight':{'start':0,'end':5},'morning_rush':{'start':5,'end':10},\n",
    "              'midday':{'start':10,'end':15},'aft_rush':{'start':15,'end':19},'evening':{'start':19,'end':24}}\n",
    "\n",
    "\n",
    "\n",
    "emptythresh = config['general']['emptythresh']\n",
    "zero_weight = config['general']['zero_weight']\n",
    "one_weight = config['general']['one_weight']\n",
    "one_weight_offset = config['general']['one_weight_offset']\n",
    "patience_threshold = config['general']['patience_threshold']\n",
    "\n",
    "\n",
    "# modifier for saved model elements\n",
    "modifier = config['general']['modifier']\n",
    "\n",
    "# control whether training controlled by early stop\n",
    "early_stop = True\n",
    "\n",
    "# default hyperparameter values\n",
    "learning_rate = config['hyperparameters']['learning_rate']\n",
    "dropout_rate = config['hyperparameters']['dropout_rate']\n",
    "l2_lambda = config['hyperparameters']['l2_lambda']\n",
    "loss_func = config['hyperparameters']['loss_func']\n",
    "output_activation = config['hyperparameters']['output_activation']\n",
    "batch_size = config['hyperparameters']['batch_size']\n",
    "epochs = config['hyperparameters']['epochs']\n",
    "\n",
    "# date values\n",
    "date_today = datetime.now()\n",
    "print(\"date today\",date_today)\n",
    "start_date =  date(config['general']['start_year'],config['general']['start_month'], config['general']['start_day'])\n",
    "print(\"start date\",start_date)\n",
    "end_date = date(config['general']['end_year'],config['general']['end_month'], config['general']['end_day'])\n",
    "print(\"end date\",end_date)\n",
    "\n",
    "\n",
    "# pickled original dataset and post-preprocessing dataset\n",
    "pickled_data_file = config['general']['pickled_data_file']\n",
    "pickled_dataframe = config['general']['pickled_dataframe']\n",
    "routedirection_file = config['general']['route_direction_file']\n",
    "\n",
    "# experiment parameter\n",
    "\n",
    "current_experiment = config['test_parms']['current_experiment']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Helper functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "# time_of_day = {'overnight':{'start':0,'end':5},'morning_rush':{'start':5,'end':10},\n",
    "#              'midday':{'start':10,'end':15},'aft_rush':{'start':15,'end':19},'evening':{'start':19,'end':23}}\n",
    "\n",
    "\n",
    "def get_time(hour):\n",
    "    for tod in time_of_day:\n",
    "        if (hour >= time_of_day[tod]['start']) and (hour < time_of_day[tod]['end']):\n",
    "            tod_out = tod\n",
    "    return(tod_out)\n",
    "\n",
    "def weekend_time(day, tod):\n",
    "    if (day=='Saturday') or (day=='Sunday'):\n",
    "        return('w'+tod)\n",
    "    else:\n",
    "        return(tod)\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "# get the paths required\n",
    "\n",
    "def get_path():\n",
    "    '''get the path for data files'''\n",
    "    rawpath = os.getcwd()\n",
    "    # data is in a directory called \"data\" that is a sibling to the directory containing the notebook\n",
    "    path = os.path.abspath(os.path.join(rawpath, '..', 'data'))\n",
    "    return(path)\n",
    "\n",
    "def get_pipeline_path():\n",
    "    '''get the path for data files'''\n",
    "    rawpath = os.getcwd()\n",
    "    # data is in a directory called \"data\" that is a sibling to the directory containing the notebook\n",
    "    path = os.path.abspath(os.path.join(rawpath, '..', 'pipelines'))\n",
    "    return(path)\n",
    "\n",
    "def get_model_path():\n",
    "    '''get the path for data files'''\n",
    "    rawpath = os.getcwd()\n",
    "    # data is in a directory called \"data\" that is a sibling to the directory containing the notebook\n",
    "    path = os.path.abspath(os.path.join(rawpath, '..', 'models'))\n",
    "    return(path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "def set_experiment_parameters(experiment_number, count_no_delay, count_delay):\n",
    "    ''' set the appropriate parameters for the experiment '''\n",
    "    print(\"setting parameters for experiment \", experiment_number)\n",
    "    # default settings for early stopping:\n",
    "    es_monitor = \"val_loss\"\n",
    "    es_mode = \"min\"\n",
    "    if experiment_number == 1:\n",
    "        #\n",
    "        early_stop = False\n",
    "        #\n",
    "        one_weight = 1.0\n",
    "        #\n",
    "        epochs = 10\n",
    "    elif experiment_number == 2:\n",
    "        #\n",
    "        early_stop = False\n",
    "        #\n",
    "        one_weight = 1.0\n",
    "        #\n",
    "        epochs = 50\n",
    "    elif experiment_number == 3:\n",
    "        #\n",
    "        early_stop = False\n",
    "        #\n",
    "        one_weight = (count_no_delay/count_delay) + one_weight_offset\n",
    "        #\n",
    "        epochs = 50\n",
    "    elif experiment_number == 4:\n",
    "        #\n",
    "        early_stop = True\n",
    "        es_monitor = \"val_loss\"\n",
    "        es_mode = \"min\"\n",
    "        #\n",
    "        one_weight = (count_no_delay/count_delay) + one_weight_offset\n",
    "        #\n",
    "        epochs = 50\n",
    "    elif experiment_number == 5:\n",
    "        #\n",
    "        early_stop = True\n",
    "        if sys.version_info >= (3,7):\n",
    "            es_monitor=\"val_accuracy\"\n",
    "        else:\n",
    "            es_monitor = \"val_acc\"\n",
    "        es_mode = \"max\"\n",
    "        #\n",
    "        one_weight = (count_no_delay/count_delay) + one_weight_offset\n",
    "        #\n",
    "        epochs = 50\n",
    "    else:\n",
    "        early_stop = True\n",
    "    return(early_stop, one_weight, epochs,es_monitor,es_mode)\n",
    "\n",
    "\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Ingest data and create refactored dataframe\n",
    "- Ingest data for route information and delay information\n",
    "- Create refactored dataframe with one row per route / direction / timeslot combination\n",
    "\n",
    "<a name='ingestdash' />\n",
    "<a href=#linkanchor>Back to link list</a>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "# load list of valid routes and directions into dataframe\n",
    "def ingest_data(path):\n",
    "    routedirection_frame = pd.read_csv(os.path.join(path,routedirection_file))\n",
    "    routedirection_frame.tail()\n",
    "    file_name = os.path.join(path,pickled_dataframe)\n",
    "    merged_data = pd.read_pickle(file_name)\n",
    "    merged_data.head()\n",
    "    return(routedirection_frame, merged_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "# add derived columns to merged_data dataframe\n",
    "def prep_merged_data(merged_data):\n",
    "    # define cols for year month day hour\n",
    "    merged_data['year'] = pd.DatetimeIndex(merged_data['Report Date']).year\n",
    "    merged_data['month'] = pd.DatetimeIndex(merged_data['Report Date']).month\n",
    "    merged_data['daym'] = pd.DatetimeIndex(merged_data['Report Date']).day\n",
    "    merged_data['hour'] = pd.DatetimeIndex(merged_data['Report Date Time']).hour\n",
    "    # define time of day column\n",
    "    merged_data['time_of_day'] = merged_data['hour'].apply(lambda x:get_time(x))\n",
    "    # add a special timeframe for weekends\n",
    "    merged_data['time_of_day'] = merged_data.apply(lambda x: weekend_time(x['Day'], x['time_of_day']), axis=1)\n",
    "    if targetcontinuous:\n",
    "        merged_data['target'] = merged_data['Min Delay']\n",
    "    else:\n",
    "        merged_data['target'] = np.where(merged_data['Min Delay'] >= targetthresh, 1, 0 )\n",
    "    return(merged_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "# generate dataframe containing rows for each timeslot for each route for each direction\n",
    "# and merge with the input merged_data dataframe to get a result of a sparse dataframe with the\n",
    "# timeslot / route / direction combinations where delays occurred\n",
    "def prep_sparse_df(routedirection_frame, merged_data):\n",
    "    routedirection_frame['count'] = 0\n",
    "    print(\"routedirection\")\n",
    "    display(routedirection_frame[:5])\n",
    "    # define a dataframe with a row for each date to be covered\n",
    "    days = pd.date_range(start_date, end_date, freq='D')\n",
    "    date_frame = pd.DataFrame({'date':days,'count':0})\n",
    "    print(\"date_frame\")\n",
    "    display(date_frame[:5])\n",
    "    # define a dataframe with a row for each hour\n",
    "    hour_list = list(range(0,24))\n",
    "    hour_frame = pd.DataFrame({'hour':hour_list,'count':0})\n",
    "    print(\"hour_frame\")\n",
    "    display(hour_frame[:5])\n",
    "    #vprint(hour_frame.head())\n",
    "    # merge date_frame and routedirection\n",
    "    result1 = pd.merge(date_frame, routedirection_frame, on='count', how='outer')\n",
    "    print(\"result1\")\n",
    "    display(result1[:5])\n",
    "    # merge result1 with hour_frame\n",
    "    result2 = pd.merge(result1, hour_frame, on='count', how='outer')\n",
    "    result2 = result2.rename(columns={'date': 'Report Date'})\n",
    "    result2.Route = result2.Route.astype(str)\n",
    "    # segment the date\n",
    "    result2['year'] = pd.DatetimeIndex(result2['Report Date']).year\n",
    "    result2['month'] = pd.DatetimeIndex(result2['Report Date']).month\n",
    "    result2['daym'] = pd.DatetimeIndex(result2['Report Date']).day\n",
    "    result2['day'] = pd.DatetimeIndex(result2['Report Date']).weekday\n",
    "    print(\"result2\")\n",
    "    display(result2[:5])\n",
    "    print(\"merged_data before\")\n",
    "    display(merged_data[:5])\n",
    "    # drop extraneous columns from merged_data\n",
    "    merged_data = merged_data.drop(['Time',\n",
    "     'Report Date Time',\n",
    "     'year',\n",
    "     'month',\n",
    "     'daym',\n",
    "     'time_of_day','Min Gap','Location','Incident','Vehicle','target','Day'],axis=1)\n",
    "    print(\"merged_data after dropping extraneous columns\")\n",
    "    display(merged_data[:5])\n",
    "    # join result2 and the trimmed merged_data\n",
    "    result3 = pd.merge(result2,merged_data ,how='left', on=['Report Date','Route','Direction','hour'])\n",
    "    result3['Min Delay'].fillna(value=0.0,inplace=True)\n",
    "    result3['target'] = np.where(result3['Min Delay'] > 0.0, 1, 0 )\n",
    "    print(\"result3\")\n",
    "    display(result3[:5])\n",
    "    return(result3)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "# REFACTORED DATAFRAME SHOULD HAVE THE FOLLOWING COLUMNS:\n",
    "# DAY - for every day in the history from Jan 1 2014 to July 31 2018\n",
    "# HOUR - for every hour of the day\n",
    "#  for 501, regular route 5:00 am - midnight; 301 overnight\n",
    "#   for 503: 7- 10:00 am; 4-7:00 pm\n",
    "# for 504 5:00 am - 2:00 am; 304 overnight\n",
    "# for 505 5:00 am - 1:00 am\n",
    "# for 506 5:00 am - 1:00 am; 306 overnight\n",
    "# for 509 5:00 am - 1:00 am\n",
    "# for 510 5:00 am - 2:00 am; 310 overnight\n",
    "# for 511 5:00 am - 1:00 am\n",
    "# for 512 5:00 am - 2:00 am\n",
    "# for 514 (Cherry street)\n",
    "# ROUTE\n",
    "# DIRECTION\n",
    "# DELAY - where this could be count OR duration OR binary\n",
    "\n",
    "# example of filling in values:\n",
    "# data['PriceDate'] =  pd.to_datetime(data['PriceDate'], format='%m/%d/%Y')\n",
    "# data = data.sort_values(by=['PriceDate'], ascending=[True])\n",
    "# data.set_index('PriceDate', inplace=True)\n",
    "# print (data)\n",
    "\n",
    "# data = data.resample('D').ffill().reset_index()\n",
    "# print (data)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Master Prep Calls\n",
    "Contains calls to functions to load data, prep input dataframes, and create refactored dataframe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "path is C:\\personal\\manning\\deep_learning_for_structured_data\\data\n",
      "shape of pre refactored dataset (61500, 17)\n",
      "routedirection\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Route</th>\n",
       "      <th>Direction</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>301</td>\n",
       "      <td>e</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>301</td>\n",
       "      <td>w</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>301</td>\n",
       "      <td>b</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>304</td>\n",
       "      <td>e</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>304</td>\n",
       "      <td>e</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Route Direction  count\n",
       "0    301         e      0\n",
       "1    301         w      0\n",
       "2    301         b      0\n",
       "3    304         e      0\n",
       "4    304         e      0"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "date_frame\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2014-01-01</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2014-01-02</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2014-01-03</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2014-01-04</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2014-01-05</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        date  count\n",
       "0 2014-01-01      0\n",
       "1 2014-01-02      0\n",
       "2 2014-01-03      0\n",
       "3 2014-01-04      0\n",
       "4 2014-01-05      0"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "hour_frame\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>hour</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   hour  count\n",
       "0     0      0\n",
       "1     1      0\n",
       "2     2      0\n",
       "3     3      0\n",
       "4     4      0"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "result1\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>count</th>\n",
       "      <th>Route</th>\n",
       "      <th>Direction</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2014-01-01</td>\n",
       "      <td>0</td>\n",
       "      <td>301</td>\n",
       "      <td>e</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2014-01-01</td>\n",
       "      <td>0</td>\n",
       "      <td>301</td>\n",
       "      <td>w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2014-01-01</td>\n",
       "      <td>0</td>\n",
       "      <td>301</td>\n",
       "      <td>b</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2014-01-01</td>\n",
       "      <td>0</td>\n",
       "      <td>304</td>\n",
       "      <td>e</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2014-01-01</td>\n",
       "      <td>0</td>\n",
       "      <td>304</td>\n",
       "      <td>e</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        date  count  Route Direction\n",
       "0 2014-01-01      0    301         e\n",
       "1 2014-01-01      0    301         w\n",
       "2 2014-01-01      0    301         b\n",
       "3 2014-01-01      0    304         e\n",
       "4 2014-01-01      0    304         e"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "result2\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Report Date</th>\n",
       "      <th>count</th>\n",
       "      <th>Route</th>\n",
       "      <th>Direction</th>\n",
       "      <th>hour</th>\n",
       "      <th>year</th>\n",
       "      <th>month</th>\n",
       "      <th>daym</th>\n",
       "      <th>day</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2014-01-01</td>\n",
       "      <td>0</td>\n",
       "      <td>301</td>\n",
       "      <td>e</td>\n",
       "      <td>0</td>\n",
       "      <td>2014</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2014-01-01</td>\n",
       "      <td>0</td>\n",
       "      <td>301</td>\n",
       "      <td>e</td>\n",
       "      <td>1</td>\n",
       "      <td>2014</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2014-01-01</td>\n",
       "      <td>0</td>\n",
       "      <td>301</td>\n",
       "      <td>e</td>\n",
       "      <td>2</td>\n",
       "      <td>2014</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2014-01-01</td>\n",
       "      <td>0</td>\n",
       "      <td>301</td>\n",
       "      <td>e</td>\n",
       "      <td>3</td>\n",
       "      <td>2014</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2014-01-01</td>\n",
       "      <td>0</td>\n",
       "      <td>301</td>\n",
       "      <td>e</td>\n",
       "      <td>4</td>\n",
       "      <td>2014</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Report Date  count Route Direction  hour  year  month  daym  day\n",
       "0  2014-01-01      0   301         e     0  2014      1     1    2\n",
       "1  2014-01-01      0   301         e     1  2014      1     1    2\n",
       "2  2014-01-01      0   301         e     2  2014      1     1    2\n",
       "3  2014-01-01      0   301         e     3  2014      1     1    2\n",
       "4  2014-01-01      0   301         e     4  2014      1     1    2"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "merged_data before\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Day</th>\n",
       "      <th>Direction</th>\n",
       "      <th>Incident</th>\n",
       "      <th>Location</th>\n",
       "      <th>Min Delay</th>\n",
       "      <th>Min Gap</th>\n",
       "      <th>Report Date</th>\n",
       "      <th>Route</th>\n",
       "      <th>Time</th>\n",
       "      <th>Vehicle</th>\n",
       "      <th>Report Date Time</th>\n",
       "      <th>year</th>\n",
       "      <th>month</th>\n",
       "      <th>daym</th>\n",
       "      <th>hour</th>\n",
       "      <th>time_of_day</th>\n",
       "      <th>target</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Report Date Time</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2014-01-02 06:31:31</th>\n",
       "      <td>Thursday</td>\n",
       "      <td>e</td>\n",
       "      <td>Late Leaving Garage</td>\n",
       "      <td>dundas and roncesvalles</td>\n",
       "      <td>4.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>2014-01-02</td>\n",
       "      <td>505</td>\n",
       "      <td>06:31:00</td>\n",
       "      <td>4018</td>\n",
       "      <td>2014-01-02 06:31:31</td>\n",
       "      <td>2014</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>6</td>\n",
       "      <td>morning_rush</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2014-01-02 12:43:43</th>\n",
       "      <td>Thursday</td>\n",
       "      <td>e</td>\n",
       "      <td>Utilized Off Route</td>\n",
       "      <td>king and shaw</td>\n",
       "      <td>20.0</td>\n",
       "      <td>22.0</td>\n",
       "      <td>2014-01-02</td>\n",
       "      <td>504</td>\n",
       "      <td>12:43:00</td>\n",
       "      <td>4128</td>\n",
       "      <td>2014-01-02 12:43:43</td>\n",
       "      <td>2014</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>12</td>\n",
       "      <td>midday</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2014-01-02 14:01:01</th>\n",
       "      <td>Thursday</td>\n",
       "      <td>w</td>\n",
       "      <td>Held By</td>\n",
       "      <td>bingham and kingston road</td>\n",
       "      <td>13.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>2014-01-02</td>\n",
       "      <td>501</td>\n",
       "      <td>14:01:00</td>\n",
       "      <td>4016</td>\n",
       "      <td>2014-01-02 14:01:01</td>\n",
       "      <td>2014</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>14</td>\n",
       "      <td>midday</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2014-01-02 14:22:22</th>\n",
       "      <td>Thursday</td>\n",
       "      <td>w</td>\n",
       "      <td>Investigation</td>\n",
       "      <td>king st. and roncesvalles</td>\n",
       "      <td>7.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>2014-01-02</td>\n",
       "      <td>504</td>\n",
       "      <td>14:22:00</td>\n",
       "      <td>4175</td>\n",
       "      <td>2014-01-02 14:22:22</td>\n",
       "      <td>2014</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>14</td>\n",
       "      <td>midday</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2014-01-02 16:42:42</th>\n",
       "      <td>Thursday</td>\n",
       "      <td>e</td>\n",
       "      <td>Utilized Off Route</td>\n",
       "      <td>bathurst and king</td>\n",
       "      <td>3.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>2014-01-02</td>\n",
       "      <td>504</td>\n",
       "      <td>16:42:00</td>\n",
       "      <td>4080</td>\n",
       "      <td>2014-01-02 16:42:42</td>\n",
       "      <td>2014</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>16</td>\n",
       "      <td>aft_rush</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                          Day Direction             Incident  \\\n",
       "Report Date Time                                               \n",
       "2014-01-02 06:31:31  Thursday         e  Late Leaving Garage   \n",
       "2014-01-02 12:43:43  Thursday         e   Utilized Off Route   \n",
       "2014-01-02 14:01:01  Thursday         w              Held By   \n",
       "2014-01-02 14:22:22  Thursday         w        Investigation   \n",
       "2014-01-02 16:42:42  Thursday         e   Utilized Off Route   \n",
       "\n",
       "                                      Location  Min Delay  Min Gap  \\\n",
       "Report Date Time                                                     \n",
       "2014-01-02 06:31:31    dundas and roncesvalles        4.0      8.0   \n",
       "2014-01-02 12:43:43              king and shaw       20.0     22.0   \n",
       "2014-01-02 14:01:01  bingham and kingston road       13.0     19.0   \n",
       "2014-01-02 14:22:22  king st. and roncesvalles        7.0     11.0   \n",
       "2014-01-02 16:42:42          bathurst and king        3.0      6.0   \n",
       "\n",
       "                    Report Date Route      Time Vehicle    Report Date Time  \\\n",
       "Report Date Time                                                              \n",
       "2014-01-02 06:31:31  2014-01-02   505  06:31:00    4018 2014-01-02 06:31:31   \n",
       "2014-01-02 12:43:43  2014-01-02   504  12:43:00    4128 2014-01-02 12:43:43   \n",
       "2014-01-02 14:01:01  2014-01-02   501  14:01:00    4016 2014-01-02 14:01:01   \n",
       "2014-01-02 14:22:22  2014-01-02   504  14:22:00    4175 2014-01-02 14:22:22   \n",
       "2014-01-02 16:42:42  2014-01-02   504  16:42:00    4080 2014-01-02 16:42:42   \n",
       "\n",
       "                     year  month  daym  hour   time_of_day  target  \n",
       "Report Date Time                                                    \n",
       "2014-01-02 06:31:31  2014      1     2     6  morning_rush       0  \n",
       "2014-01-02 12:43:43  2014      1     2    12        midday       1  \n",
       "2014-01-02 14:01:01  2014      1     2    14        midday       1  \n",
       "2014-01-02 14:22:22  2014      1     2    14        midday       1  \n",
       "2014-01-02 16:42:42  2014      1     2    16      aft_rush       0  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "merged_data after dropping extraneous columns\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Direction</th>\n",
       "      <th>Min Delay</th>\n",
       "      <th>Report Date</th>\n",
       "      <th>Route</th>\n",
       "      <th>hour</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Report Date Time</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2014-01-02 06:31:31</th>\n",
       "      <td>e</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2014-01-02</td>\n",
       "      <td>505</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2014-01-02 12:43:43</th>\n",
       "      <td>e</td>\n",
       "      <td>20.0</td>\n",
       "      <td>2014-01-02</td>\n",
       "      <td>504</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2014-01-02 14:01:01</th>\n",
       "      <td>w</td>\n",
       "      <td>13.0</td>\n",
       "      <td>2014-01-02</td>\n",
       "      <td>501</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2014-01-02 14:22:22</th>\n",
       "      <td>w</td>\n",
       "      <td>7.0</td>\n",
       "      <td>2014-01-02</td>\n",
       "      <td>504</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2014-01-02 16:42:42</th>\n",
       "      <td>e</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2014-01-02</td>\n",
       "      <td>504</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                    Direction  Min Delay Report Date Route  hour\n",
       "Report Date Time                                                \n",
       "2014-01-02 06:31:31         e        4.0  2014-01-02   505     6\n",
       "2014-01-02 12:43:43         e       20.0  2014-01-02   504    12\n",
       "2014-01-02 14:01:01         w       13.0  2014-01-02   501    14\n",
       "2014-01-02 14:22:22         w        7.0  2014-01-02   504    14\n",
       "2014-01-02 16:42:42         e        3.0  2014-01-02   504    16"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "result3\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Report Date</th>\n",
       "      <th>count</th>\n",
       "      <th>Route</th>\n",
       "      <th>Direction</th>\n",
       "      <th>hour</th>\n",
       "      <th>year</th>\n",
       "      <th>month</th>\n",
       "      <th>daym</th>\n",
       "      <th>day</th>\n",
       "      <th>Min Delay</th>\n",
       "      <th>target</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2014-01-01</td>\n",
       "      <td>0</td>\n",
       "      <td>301</td>\n",
       "      <td>e</td>\n",
       "      <td>0</td>\n",
       "      <td>2014</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2014-01-01</td>\n",
       "      <td>0</td>\n",
       "      <td>301</td>\n",
       "      <td>e</td>\n",
       "      <td>1</td>\n",
       "      <td>2014</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2014-01-01</td>\n",
       "      <td>0</td>\n",
       "      <td>301</td>\n",
       "      <td>e</td>\n",
       "      <td>2</td>\n",
       "      <td>2014</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2014-01-01</td>\n",
       "      <td>0</td>\n",
       "      <td>301</td>\n",
       "      <td>e</td>\n",
       "      <td>3</td>\n",
       "      <td>2014</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2014-01-01</td>\n",
       "      <td>0</td>\n",
       "      <td>301</td>\n",
       "      <td>e</td>\n",
       "      <td>4</td>\n",
       "      <td>2014</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Report Date  count Route Direction  hour  year  month  daym  day  Min Delay  \\\n",
       "0  2014-01-01      0   301         e     0  2014      1     1    2        0.0   \n",
       "1  2014-01-01      0   301         e     1  2014      1     1    2        0.0   \n",
       "2  2014-01-01      0   301         e     2  2014      1     1    2        0.0   \n",
       "3  2014-01-01      0   301         e     3  2014      1     1    2        0.0   \n",
       "4  2014-01-01      0   301         e     4  2014      1     1    2        0.0   \n",
       "\n",
       "   target  \n",
       "0       0  \n",
       "1       0  \n",
       "2       0  \n",
       "3       0  \n",
       "4       0  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "shape of refactored dataset (2540912, 11)\n",
      "count of no delay  2486056\n",
      "count of delay  54856\n",
      "setting parameters for experiment  5\n",
      "early_stop is  True\n",
      "one_weight is  45.319673326527635\n",
      "epochs is  50\n",
      "es_monitor is  val_accuracy\n",
      "es_mode is  max\n"
     ]
    }
   ],
   "source": [
    "# master calls\n",
    "# get the path for data files\n",
    "path = get_path()\n",
    "print(\"path is\",path)\n",
    "# load route direction and delay data datframes\n",
    "directions_df, merged_data = ingest_data(path)\n",
    "merged_data = prep_merged_data(merged_data)\n",
    "print(\"shape of pre refactored dataset\", merged_data.shape)\n",
    "merged_data['year'].value_counts()\n",
    "merged_data.groupby(['Route','Direction']).size().reset_index().rename(columns={0:'count'}).tail(50)\n",
    "# create refactored dataframe with one row for each route / direction / timeslot combination\n",
    "merged_data = prep_sparse_df(directions_df, merged_data)\n",
    "print(\"shape of refactored dataset\", merged_data.shape)\n",
    "count_no_delay = merged_data[merged_data['target']==0].shape[0]\n",
    "count_delay = merged_data[merged_data['target']==1].shape[0]\n",
    "print(\"count of no delay \",count_no_delay)\n",
    "print(\"count of delay \",count_delay)\n",
    "# define parameters for the current experiment\n",
    "experiment_number = 5\n",
    "early_stop, one_weight, epochs,es_monitor,es_mode = set_experiment_parameters(experiment_number, count_no_delay, count_delay)\n",
    "print(\"early_stop is \",early_stop)\n",
    "print(\"one_weight is \",one_weight)\n",
    "print(\"epochs is \",epochs)\n",
    "print(\"es_monitor is \",es_monitor)\n",
    "print(\"es_mode is \",es_mode)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(2540912, 11)"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "merged_data.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Define test / training sets; encode categorical values; process text field\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "# get training and test data set\n",
    "def get_train_validation_test(dataset):\n",
    "    train, test = train_test_split(dataset, test_size = testproportion)\n",
    "    dtrain, dvalid = train_test_split(train, random_state=123, train_size=trainproportion)\n",
    "    print(\"Through train test split. Test proportion:\")\n",
    "    print(testproportion)\n",
    "    return(dtrain,dvalid,test)\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Define feature categories <a name='definecategories' />\n",
    "<a href=#linkanchor>Back to link list</a>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "all cols ['Report Date', 'count', 'Route', 'Direction', 'hour', 'year', 'month', 'daym', 'day', 'Min Delay', 'target']\n"
     ]
    }
   ],
   "source": [
    "allcols = list(merged_data)\n",
    "print(\"all cols\",allcols)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "# define the required column lists\n",
    "def def_col_lists():\n",
    "    textcols = [] # columns to deal with as text - replace entries with multiple IDs and use embeddings, RNN\n",
    "    continuouscols = [] # columns to deal with as continuous values - no embeddings\n",
    "    if targetcontinuous:\n",
    "        excludefromcolist = ['count','Report Date', 'target','count_md','Min Delay'] # columns to exclude completely from the model\n",
    "\n",
    "    else:\n",
    "        # if target column is not renamed Min Delay put Min Delay in exclusion list\n",
    "        excludefromcolist = ['count','Report Date', 'target','count_md', 'Min Delay'] # columns to exclude completely from the model\n",
    "    nontextcols = list(set(allcols) - set(textcols))\n",
    "    collist = list(set(nontextcols) - set(excludefromcolist) - set(continuouscols))\n",
    "    for col in continuouscols:\n",
    "        print(\"col is\",col)\n",
    "        merged_data[col] = merged_data[col].astype(float)\n",
    "        print(\"got through one\")\n",
    "        superset_data[col] = superset_data[col].astype(float)\n",
    "    # print column list lengths and contents:\n",
    "    print(\"allcols\",len(allcols))\n",
    "    print(\"excludefromcolist\",len(excludefromcolist))\n",
    "    print(excludefromcolist)\n",
    "    print(\"textcols\",len(textcols))\n",
    "    print(textcols)\n",
    "    print(\"continuouscols\",len(continuouscols))\n",
    "    print(continuouscols)\n",
    "    print(\"collist\",len(collist))\n",
    "    print(collist)\n",
    "    return(collist,continuouscols,textcols)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Invoke Pipeline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "# define keras variables\n",
    "\n",
    "\n",
    "# X for the features used\n",
    "\n",
    "def get_keras_vars(dataset):\n",
    "    X = {}\n",
    "    dictlist = []\n",
    "    for col in collist:\n",
    "        if verboseout:\n",
    "            print(\"cat col is\",col)\n",
    "        X[col] = np.array(dataset[col])\n",
    "        dictlist.append(np.array(dataset[col]))\n",
    "\n",
    "    for col in textcols:\n",
    "        if verboseout:\n",
    "            print(\"text col is\",col)\n",
    "        X[col] = pad_sequences(dataset[col], maxlen=max_dict[col])\n",
    "        dictlist.append(pad_sequences(dataset[col], maxlen=max_dict[col]))\n",
    "\n",
    "    for col in continuouscols:\n",
    "        if verboseout:\n",
    "            print(\"cont col is\",col)\n",
    "        X[col] = np.array(dataset[col])\n",
    "        dictlist.append(np.array(dataset[col]))\n",
    "\n",
    "    return X, dictlist\n",
    "\n",
    "def get_keras_list_only(X_in):\n",
    "    dictlist = []\n",
    "    for key, value in X_in.items():\n",
    "        print(\"X def loop key\",key)\n",
    "        print(\"value shape\",value.shape)\n",
    "        temp = [key,value]\n",
    "        dictlist.append(value)\n",
    "    return dictlist\n",
    "\n",
    "def get_keras_np(X_in):\n",
    "    return np.array(list(X_in.items()),dtype=object)\n",
    "# np.array(list(result.items()), dtype=dtype)\n",
    "\n",
    "# the deployment API for Watson Studio can only take a list/array, not a dictionary, so define list-only version for input\n",
    "\n",
    "\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "allcols 11\n",
      "excludefromcolist 5\n",
      "['count', 'Report Date', 'target', 'count_md', 'Min Delay']\n",
      "textcols 0\n",
      "[]\n",
      "continuouscols 0\n",
      "[]\n",
      "collist 7\n",
      "['daym', 'year', 'hour', 'day', 'Direction', 'month', 'Route']\n",
      "fill empty xform\n",
      "col is  daym\n",
      "col is  year\n",
      "col is  hour\n",
      "col is  day\n",
      "col is  Direction\n",
      "col is  month\n",
      "col is  Route\n",
      "transform col is  daym\n",
      "after transform col is  daym\n",
      "transform col is  year\n",
      "after transform col is  year\n",
      "transform col is  hour\n",
      "after transform col is  hour\n",
      "transform col is  day\n",
      "after transform col is  day\n",
      "transform col is  Direction\n",
      "after transform col is  Direction\n",
      "transform col is  month\n",
      "after transform col is  month\n",
      "transform col is  Route\n",
      "after transform col is  Route\n",
      "Through train test split. Test proportion:\n",
      "0.2\n",
      "cat col is daym\n",
      "cat col is year\n",
      "cat col is hour\n",
      "cat col is day\n",
      "cat col is Direction\n",
      "cat col is month\n",
      "cat col is Route\n",
      "cat col is daym\n",
      "cat col is year\n",
      "cat col is hour\n",
      "cat col is day\n",
      "cat col is Direction\n",
      "cat col is month\n",
      "cat col is Route\n",
      "cat col is daym\n",
      "cat col is year\n",
      "cat col is hour\n",
      "cat col is day\n",
      "cat col is Direction\n",
      "cat col is month\n",
      "cat col is Route\n",
      "keras variables defined\n",
      "X_train_list [array([16,  3, 13, ..., 27, 22, 26], dtype=int64), array([1, 1, 3, ..., 0, 2, 3], dtype=int64), array([14,  3,  7, ..., 14, 10, 23], dtype=int64), array([1, 2, 1, ..., 3, 3, 5], dtype=int64), array([2, 2, 1, ..., 1, 0, 4]), array([ 2,  1, 10, ...,  7,  5,  4], dtype=int64), array([ 9,  3, 10, ...,  2, 12, 10])]\n"
     ]
    }
   ],
   "source": [
    "# master block to invoke pipeline\n",
    "\n",
    "# build fully qualified names for the files for saving the pipelines\n",
    "pipeline_path = get_pipeline_path()\n",
    "pipeline1_file_name = os.path.join(pipeline_path,'sc_delay_pipleline'+modifier+'.pkl')\n",
    "pipeline2_file_name = os.path.join(pipeline_path,'sc_delay_pipleline_keras_prep'+modifier+'.pkl')\n",
    "\n",
    "# define column lists:\n",
    "collist,continuouscols,textcols = def_col_lists()\n",
    "\n",
    "# create objects of the pipeline classes\n",
    "fe = fill_empty()\n",
    "ec = encode_categorical()\n",
    "pk = prep_for_keras_input()\n",
    "\n",
    "# need to implement the pipeline in two parts:\n",
    "# 1. fill empty + encode categoricals\n",
    "# 2. prep for Keras\n",
    "# because part 1 needs to be applied to the entire dataset and part 2 to the individual train, validate, and test sets\n",
    "\n",
    "\n",
    "sc_delay_pipeline = Pipeline([('fill_empty',fe),('encode_categorical',ec)])\n",
    "sc_delay_pipeline_keras_prep = Pipeline([('prep_for_keras',pk)])\n",
    "\n",
    "\n",
    "\n",
    "# provide the value for each parameter of each of the pipeline classes\n",
    "\n",
    "sc_delay_pipeline.set_params(fill_empty__collist = collist, fill_empty__continuouscols = continuouscols,\n",
    "                            fill_empty__textcols = textcols,encode_categorical__col_list = collist)\n",
    "sc_delay_pipeline_keras_prep.set_params(prep_for_keras__collist = collist,\n",
    "                            prep_for_keras__continuouscols = continuouscols,\n",
    "                            prep_for_keras__textcols = textcols)\n",
    "\n",
    "# fit the input dataset to the pipeline\n",
    "\n",
    "# first fit the first segment of pipeline on the whole dataset\n",
    "X = sc_delay_pipeline.fit_transform(merged_data)\n",
    "max_dict = ec.max_dict\n",
    "# then split dataset\n",
    "dump(sc_delay_pipeline, open(pipeline1_file_name,'wb'))\n",
    "dump(sc_delay_pipeline_keras_prep, open(pipeline2_file_name,'wb'))\n",
    "dtrain, dvalid, test = get_train_validation_test(X)\n",
    "# then apply second portion of pipeline to each subset\n",
    "\n",
    "X_train, X_train_list = get_keras_vars(dtrain)\n",
    "X_valid, X_valid_list = get_keras_vars(dvalid)\n",
    "X_test,X_test_list = get_keras_vars(test)\n",
    "\n",
    "print(\"keras variables defined\")\n",
    "print(\"X_train_list\",X_train_list)\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Define and fit model <a name='modelfit' />\n",
    "- the deep learning model requires a list of numpy arrays\n",
    "- XGBoost requires a numpy array of lists, so the training and test datasets need to be transformed before the XGBoost model is fit\n",
    "\n",
    "<a href=#linkanchor>Back to link list</a>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "# get lists of lists for the training and test datasets\n",
    "list_of_lists_train = []\n",
    "list_of_lists_test = []\n",
    "for i in range(0,7):\n",
    "    list_of_lists_train.append(X_train_list[i].tolist())\n",
    "    list_of_lists_test.append(X_test_list[i].tolist())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Wall time: 1.48 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "# convert lists of lists to numpy arrays of lists\n",
    "xgb_X_train = np.array(list_of_lists_train).T\n",
    "xgb_X_test = np.array(list_of_lists_test).T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[16,  1, 14, ...,  2,  2,  9],\n",
       "       [ 3,  1,  3, ...,  2,  1,  3],\n",
       "       [13,  3,  7, ...,  1, 10, 10],\n",
       "       ...,\n",
       "       [27,  0, 14, ...,  1,  7,  2],\n",
       "       [22,  2, 10, ...,  0,  5, 12],\n",
       "       [26,  3, 23, ...,  4,  4, 10]])"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xgb_X_train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Wall time: 1min 40s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "# train XGBoost model using the same balancing factor as used for the deep learning model: one_weight\n",
    "model_path = get_model_path()\n",
    "xgb_save_model_path = os.path.join(model_path,'sc_xgbmodel'+modifier+\"_\"+str(experiment_number)+'.txt')\n",
    "model = XGBClassifier(scale_pos_weight=one_weight)\n",
    "model.fit(xgb_X_train, dtrain.target)\n",
    "model.save_model(xgb_save_model_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [],
   "source": [
    "# apply model to the test dataset\n",
    "y_pred = model.predict(xgb_X_test)\n",
    "xgb_predictions = [round(value) for value in y_pred]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy: 80.30%\n"
     ]
    }
   ],
   "source": [
    "# evaluate predictions\n",
    "xgb_accuracy = accuracy_score(test.target, xgb_predictions)\n",
    "print(\"Accuracy: %.2f%%\" % (xgb_accuracy * 100.0))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Create charts for results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"# chart accuracy and loss for train and validation sets\\n\\nprint(modelfit.history.keys())\\n#  acc\\nplt.plot(modelfit.history['accuracy'])\\nplt.plot(modelfit.history['val_accuracy'])\\nplt.title('model accuracy')\\nplt.ylabel('accuracy')\\nplt.xlabel('epoch')\\nplt.legend(['train', 'validation'], loc='upper left')\\nplt.show()\\n# Loss\\nplt.plot(modelfit.history['loss'])\\nplt.plot(modelfit.history['val_loss'])\\nplt.title('model loss')\\nplt.ylabel('loss')\\nplt.xlabel('epoch')\\nplt.legend(['train', 'validation'], loc='upper left')\\nplt.show()\""
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "'''# chart accuracy and loss for train and validation sets\n",
    "\n",
    "print(modelfit.history.keys())\n",
    "#  acc\n",
    "plt.plot(modelfit.history['accuracy'])\n",
    "plt.plot(modelfit.history['val_accuracy'])\n",
    "plt.title('model accuracy')\n",
    "plt.ylabel('accuracy')\n",
    "plt.xlabel('epoch')\n",
    "plt.legend(['train', 'validation'], loc='upper left')\n",
    "plt.show()\n",
    "# Loss\n",
    "plt.plot(modelfit.history['loss'])\n",
    "plt.plot(modelfit.history['val_loss'])\n",
    "plt.title('model loss')\n",
    "plt.ylabel('loss')\n",
    "plt.xlabel('epoch')\n",
    "plt.legend(['train', 'validation'], loc='upper left')\n",
    "plt.show()'''"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Confusion matrix <a name='confusionmatrix' />\n",
    "<a href=#linkanchor>Back to link list</a>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYwAAAEWCAYAAAB1xKBvAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3dd5wV1f3/8debJqiAFAtNsRs1ibGgsRsV0JigUSNqFFswBpOfsUWjX7EmamzR2DAaESt2oxglKioWisaoYMNKswFiV9j9/P6YszC73N2dC4sL7PvJYx7ce2bOmTN3ymfOOXPvKiIwMzOrT7PGroCZmS0dHDDMzKwQBwwzMyvEAcPMzApxwDAzs0IcMMzMrJBGDRiS2kj6l6TZkm5fhHIOlPRwQ9atMUh6UNKAhcx7tqSPJb3f0PVqSiSdLunGgsuOknTE4q5TQ5P0jqRd0us/SfrHQpYzQdKODVq5+WUfKemSBi6zcH3zn9HiUGMf/F7SuYtrXQ2pUMCQdICk8ZI+lzQ9Xdi2bYD17wOsCnSKiH0XtpCIuCkiejdAfaqRtKOkkHRXjfQfpvRRBcspdBGKiN0iYuhC1LMHcBywYUSsVm7+Mtd1vaSzF2P5h0gavbjKt+oi4s8RUW/QK7XfI2KjiBjV0HWS1Ao4FfhrQ5bbUPVN14UpDVClKkOAX0lapQHLXCzqDRiSjgUuAf5MdnFfHbgC6NcA618DeD0i5jZAWYvLR8DWkjrl0gYArzfUCpRZlNbeGsCMiPhwIdbdYhHWu9jLa0iSmjd2HRrakvx5L4J+wKsRMbWxK/JdiIivgQeBgxu7LvWKiFonoD3wObBvHcssRxZQpqXpEmC5NG9HYArZ3e+HwHTg0DTvDOBbYE5ax+HA6cCNubJ7AgG0SO8PAd4CPgPeBg7MpY/O5dsaGAfMTv9vnZs3CjgLeCqV8zDQuZZtq6r/VcCglNY8pZ0GjMot+zdgMvAp8BywXUrvW2M7/5erxzmpHl8B66S0I9L8K4E7cuWfBzwCqEYdd0n5K1P516f0nwMTgE9Sud/L5XkH+CPwIvBN1eebmy/g4rTPZqflNgYGpu34Nq3rX7WVB2wFPJ3W/z9gxxrH1bXpeJgKnJ0+1+8BXwMVqfxP0vJtgAuBd1N9RgNt0rzbgfdT+hPARrn1XJ8+xxHAF8AuJfbxmsDj6VgYCfyd6sdgXduR319rA48CM4CPgZuAldK8E4A7a6z3MuCSWo67d4CTgYnALOCfQOsax+Qf03YPI7vxOwl4M61/ONAxV95B6bObAZySyt8lzTu9xvZum9veyWTnVl37vaqchboO1LL91wGn5t4PBY5Lr7uRXRN+m96vA8wknRfAHsALqf5PAz+o8blW1bdNKncW8ApwIjClxrLHkx3Ts4HbgNbAClQ/3z4Hui7KPkjzDwQeq+t6vCRM9QWMvsBcalxQaixzJvAssAqwctpJZ+UOlLlpmZbA7sCXQIdaDtaa73umg6NF2lGfAuuneV1IFwdyAQPomA6Cg1K+/dP7TrmT/E1gvXTQjALOrWXbdiQ70LcGxqS03YGHgCOoHjB+BXRK6zyO7GRuXWq7cvV4D9go5WlJ9QvQ8mStmEOA7cguQt3rqmfu/XpkF8hdU7knApOAVrmT4QWgB+nCW6O8PmRBbyWy4PE9oEuadz1wdokL3LzyyE7qGemzapbqMQNYOS1/D3B12qerAGOBI2vuy1z5l6fPphtZYNma+Rejw4C2zL9gvZDLdz3Zyb5NqkfrEtv6DHBRyr89WeC4MXdxqms78vtrnTR/ObLz4AlSQCA7Vr9gfgBpQXbh3KyW/fkO8HL6PDuS3VScXeOcOi+tqw1wDNk52D2lXQ3ckpbfkOyitn2ad1HKv0DAIOs9+IzsnGlJdjxvUs9+rypnoa8DJbZ/HLmb1LSPq4LUAWTn7225efem15umz3VLsuNkQKrjciXqey7ZjUKH9Lm9yIIBYyxZMOhIFlR+U+p8S2kLvQ9ydZ+5OC/2DTHVFzAOBN6vZ5k3gd1rXGzeyX2wX5ELOGmHblXzYK3lfU+qB4xPgL2pcZGjesA4CBhb4qJwSO4kz9+9/Bb4dy3bNu/AAN4A1gduTZ9LtYBRIu8s4IeltitXjzNLpB2Re9+L7O7pXWD/OtZV7QAG/g8YnnvfjOxOfsfcyXBYHeX9hCxYbQU0qzHvekpfOA7Lvf8jMKzGMg+RncCrkrVC2uTm7U+6u2LB1mKzdAz9sN6DOQtwAbTP1fWGOpZfnezEXSGXdjPzL6C1bkep/VVjuT2B/+bePwj8Or3eA5hYR73eIV2c0vvdgTdz+/pbcsGP7GK2c+59F7IWQQuylvCtuXkrpPylAsbJwN211Km2/V5VzkJfB0qs6w2gb+792mTnfjOy1v6RzD8vhwLHptdXkoJULu9rwA4l6vsW0Ce33BEsGDB+lXt/PnBVqfNtUfdBSlsXqKjvGG/sqb5+8xlA53r6SbuSXdCqvJvS5pUR1ccovgRWrGe9C4iIL4D9gN8A0yU9IGmDAvWpqlO33Pv8k0RF6zMMOBrYCbi75kxJx0l6JT3x9QlZt0vnesqcXNfMiBhLdmCLrIlbVLXPICIq07ryn0Gt646IR8m6Zi4HPpA0RFK7etaZL28NYF9Jn1RNZF0dXdK8lmT7sGre1WR3pqV0JusKeLPmDEnNJZ0r6U1Jn5Kd5FV56t1Oss9pVjq2quSPnbq2o2ZdVpF0q6SpqS431qjHULJWKOn/YXXUq2a9a55TH0XW752v5925Or5C1q23aso3r6y0rTNqWWcPSnzOBTXkdWAWWasRgIh4k+wOfROy1vb9wDRJ6wM7kLUUIPscjquxv3rUqEe+vvnPuNRxUs51YlH3QVuy1vASrb6A8QxZn/KedSwzjezDqrJ6SlsYX5B1xVSp9sRPRDwUEbuSnbCvAtcUqE9VnRZ1AG0YWWtkRER8mZ8haTuyu9FfkjWzVyLb+aqqei1l1pZeVe4gsibsNLJupaKqfQaSRHbi5D+DOtcdEZdGxGZkXWbrkfXD15Uvnz6Z7M58pdy0QkScm+Z9QzZuVDWvXURsVEv5H5Mdg2uXWOcBZAOku5AF6J5Vm1xwO6cDHSStkEtbveB21PSXtK4fREQ7sqCQr8c9wA8kbUzWwripjnpBtr/ydcqfUzW3aTKwW416to5s0Hh6vixJy5N1NZUymdKfc6l11tSQ14EXyY65vMfJnqpslbbrcbJB4g5k3aGQ1f+cGp/D8hFxS4l1TCfrPqrSo8QytSn1WSzqPvge2RjZEq3OgBERs8maU5dL2lPS8pJaStpN0vlpsVuAUyWtLKlzWr7Qc+wlvABsL2l1Se3JmsgASFpV0s/Tyf0N2R1HRYkyRgDrpUeBW0jaj6wP8f6FrBMAEfE22d3MKSVmtyXr2vgIaCHpNCB/R/4B0LOcJ6EkrUc2GPwrsm62EyVtUjD7cOCnknaW1JJsTOUbsn7lIuveQtKWKe8XzB+IrtqWteop4kbgZ5L6pFZA6/QoYveImE72oMGFktpJaiZpbUk75Mrvnh6trGodXQdcJKlrKu/HkpYj+9y/IbtbW57sSb7CIuJdYDxwhqRW6VHxnxXZjhLFtSUN1EvqxvwAW7Wur4E7yLq8xkbEe/VUb5Ck7pI6An8iG3StzVXAOZLWAEjnYtVTjHcAe0jaNn2mZ1L7eX8TsIukX6Zzp1PumKtvvzfkdWAE2bmW9zhZC/+J9H4U8Duy7suqY/Ma4Dfp2JWkFST9VFJbFjQcOFlSh7S/ji6jfh8AndI1qsqi7oMdyLotl2j1XsAi4iLgWLLnoj8ii6RHk90xQXZRG092V/AS8HxKK1tEjCQ7MV4kG3TNX+SbkV34ppH16+9Adsdfs4wZZHdwx5FdSE4E9oiIjxemTjXKHh0Rpe6aHiLb2a+TNcW/pnoTt+pLiTMkPV/felIX4I3AeRHxv4h4g+yiMSxdKOur52tkgeYysjv0nwE/i4hv68ubtCM7+WYx/8mOC9K8a4ENU9P7nlKZI2Iy2Z3/n5h/zJzA/OPtYKAV858CuoP53TyPkj3d9b6kqn12PNmxNY5s35+Xyroh1W9qKuvZgtuXdwDZIOlMYHAqs+h25J1BNnA5G3gAuKvEMkOB71N/dxRkgeVhsi7Jt6j7nPobcB/wsKTPyD6HLdM2TAAGpfKmk33eJb9DkILY7mTnzkyyG7gfptn17fcGuw4A/wI2kJTvSnqcLChXBYzRZDcJVe+JiPHAr8m6U2eRPehxSC3rOJPsc3gb+A/ZMfhNkcpFxKtkAfKt9Hl0ZRH2gaTWZJ972d/B+q5VPYpmZouZpNXJulJXi4hP61juHbLB9P98V3Vb0kgaSPZF1GO+o/UdBfSPiJotm+9i3b8DekREOd3OjWJZ/NKP2RIndUceS/a0TK3BwjIRMWRxli+pC1kX2zNkTygdR9Yy+c5FxGWNsd6F4YBhtpilcbcPyLrP+jZydSzTiuzpvDXJHtm9lewXLKwO7pIyM7NC/PPmZmZWyFLfJTXn47fcRLIFHLH5CfUvZE3O0HfuVP1L1a2ca07Lzmst8vqWJG5hmJlZIUt9C8PM7DtVWer7wk2DA4aZWTkqluQ/37N4OWCYmZUh+7WapskBw8ysHJUOGGZmVoRbGGZmVogHvc3MrBC3MMzMrIjwU1JmZlaIB73NzKwQd0mZmVkhHvQ2M7NC3MIwM7NCPOhtZmaFeNDbzMyKiPAYhpmZFeExDDMzK8RdUmZmVohbGGZmVkjFnMauQaNxwDAzK4e7pMzMrBB3SZmZWSFNuIXRrLErYGa2VKmsLD7VQVJrSWMl/U/SBElnpPTTJU2V9EKads/lOVnSJEmvSeqTS99M0ktp3qWSlNKXk3RbSh8jqWcuzwBJb6RpQJFNdwvDzKwM0XCD3t8AP4mIzyW1BEZLejDNuzgiLsgvLGlDoD+wEdAV+I+k9SL7JuGVwEDgWWAE0Bd4EDgcmBUR60jqD5wH7CepIzAY2BwI4DlJ90XErLoq7BaGmVk5orL4VFcxmc/T25Zpijqy9ANujYhvIuJtYBLQS1IXoF1EPBMRAdwA7JnLMzS9vgPYObU++gAjI2JmChIjyYJMnRwwzMzKUUaXlKSBksbnpoH5oiQ1l/QC8CHZBXxMmnW0pBclXSepQ0rrBkzOZZ+S0rql1zXTq+WJiLnAbKBTHWXVyQHDzKwcZbQwImJIRGyem4ZUKyqiIiI2AbqTtRY2JuteWhvYBJgOXJgWV6na1JG+sHlq5YBhZlaOBhr0zouIT4BRQN+I+CAFkkrgGqBXWmwK0COXrTswLaV3L5FeLY+kFkB7YGYdZdXJAcPMrBwNNIYhaWVJK6XXbYBdgFfTmESVvYCX0+v7gP7pyac1gXWBsRExHfhM0lZpfOJg4N5cnqonoPYBHk3jHA8BvSV1SF1evVNanfyUlJlZOeY22B9Q6gIMldSc7OZ9eETcL2mYpE3IuojeAY4EiIgJkoYDE4G5wKCY/1vrRwHXA23Ino6qetrqWmCYpElkLYv+qayZks4CxqXlzoyImfVV2AHDzKwcDfRN74h4EfhRifSD6shzDnBOifTxwMYl0r8G9q2lrOuA68qosgOGmVlZmvA3vR0wzMzK4d+SMjOzQtzCMDOzQtzCMDOzQhruKamljgOGmVk5ot4vRC+zHDDMzMrhMQwzMyvEAcPMzArxoLeZmRVSUVH/MssoBwwzs3K4S8rMzApxwDAzs0I8hmFmZkVEpb+HYWZmRbhLyszMCvFTUmZmVohbGGZmVkgTDhjNGrsCtqCKigr2OWQQvz1h8CKXde+Ikey+3+Hsvt/h3Dti5Lz0U86+kD77HMLeAwax94BBvPr6m4u8LmsYux76U8556GL+/PAl9D7spwvMX77dCvz+6hM5+8GLGHzPuXRbr8cir7NFqxb89u/Hcv6ov3PaPX+hc/eVq81vvWIbLnl2CAedccQir2upF1F8qoOk1pLGSvqfpAmSzkjpHSWNlPRG+r9DLs/JkiZJek1Sn1z6ZpJeSvMulaSUvpyk21L6GEk9c3kGpHW8IWlAkU13wFgC3Xj7vazVc/Wy8hxy9IlMnf5BtbTZn37Glf+8mVuuuYRbrrmEK/95M7M//Wze/OMGHc6dQy/nzqGXs8F6azdI3W3RdFuvBzv234Uz+v2RU3c7lk1+sjmr9uxSbZmfDdqb9ya+zam7HcuQ4y7jwMGHFS6/c/eVOenWMxZI3/6XO/PF7M85ccejeeja+/nlSdX/rPTex+3Pq2MmLtxGLWsqK4tPdfsG+ElE/BDYBOgraSvgJOCRiFgXeCS9R9KGQH9gI6AvcIWk5qmsK4GBwLpp6pvSDwdmRcQ6wMXAeamsjsBgYEugFzA4H5hq0+gBQ9IGkv6YouLf0uvvNXa9Gsv7H37EE0+PZe+fzbt54L0p0zjy2FP55WG/4+CjjuetdycXKuupMc/x4y1+RPt2bWnfri0/3uJHPDXmucVVdWsAXdfpzpv/fZ1vv/6WyopKXh0zgc369Kq+zLrdmfDUSwBMf3MqK3dfhXad2wOw9Z7bM/ieczlzxAUc8ucjUbNip/imvXsx+s5RAIwb8Qwbbv39efN6brwW7Tq35+Un/9cAW7gMqIziUx0i83l62zJNAfQDhqb0ocCe6XU/4NaI+CYi3gYmAb0kdQHaRcQzERHADTXyVJV1B7Bzan30AUZGxMyImAWMZH6QqVWjBgxJfwRuBQSMBcal17dIOqkx69ZYzvvb1Rz728OR5u+aM86/lD/94SiGX3cZxx99BGdfcHmhsj746GNWW2V+18KqK3fmg48+nvf+0quHstfBR3He367m22+/bbiNsIU25bX3WL/Xhqyw0oq0at2KH+60KR27dK62zORX3mHzvlsBsNYP16FTt5XpuFonuqzdjV57bMPZ+5zCabsfT2VFJVvvuV2h9XZYtSMzp2XHRmVFJV999iUrdmiLJPqfOoDb/nxDw27o0qyiovAkaaCk8blpYL4oSc0lvQB8SHYBHwOsGhHTAdL/q6TFuwH5u8UpKa1bel0zvVqeiJgLzAY61VFWnRp70PtwYKOImJNPlHQRMAE4t1Sm9KEPBLjiwrM54uD9F3c9vxOjnhpDxw4rsdEG6zL2+RcB+PLLr3jhpVc49tQ/z1vu2znZx3X3Aw9z4/B7AXhv6jSOOv7/aNmiJd26rsqlfzmtZBdq6trkmN8cSudOHZgzZw6nn3cp1954O0cdduBi3kKrz/Q3p/LAVfdw4o2D+eaLr3nvlXeorPEY5/1X3s2vBh/GmSMuYMqr7/HuhLepqKhgo21+QM/vr8Xg+84DoNVyrfh0xmwAfn/1iXTusQotWragU9fOnDniAgBG/vMBnrz9sXnHRV5EsPNBfXnxseeZOX3GYt7ypUeUMegdEUOAIXXMrwA2kbQScLekjesobsGdlLVIaktf2Dy1auyAUQl0Bd6tkd4lzSspvxPmfPzWMvO1y/++OJFRo5/lyWfG8c23c/jiiy85+awLaNt2Be4cumCrYq+f9mavn/YGsjGMc045jm5dVp03f7VVOjPuvy/Oe//BRx+zxY9+AMDKnTsC0KpVK/b8aW+uv+XOxblpVoYnhj/CE8MfAWCfEw5Y4GL99edf8Y8T5h8PF4y+ko8mf8j6vTbkqTtHcfv5Ny1Q5qVHng9kYxhHXHA05/av/kDFzPdn0LFrZ2a9P5NmzZvRpu3yfPHJ56y96Xqsv8X3+MlBfWm9fGtatGzB119+ze3n3djQm730WAzf9I6ITySNIusW+kBSl4iYnrqbPkyLTQHyTzh0B6al9O4l0vN5pkhqAbQHZqb0HWvkGVVfPRt7DOMY4BFJD0oakqZ/kw30/L9Grtt37g9HHcoj99zIw3cO5a9nnESvzX7I3/7yf3TrshoPPfokkN31vfrGW4XK22bLzXh67PPM/vQzZn/6GU+PfZ5tttwMgI8+njmvvEefeJp111pj8WyUla1tp3YAdOzamc36bsWz942uNn/5dsvTvGV2r7dD/114fcxEvv78KyY+9RKb7/bjeflXaL8inbpVf9qpNv8dOY5t994RgC12/zGvPP0yAFcf8zeO3eY3HL/tUdz65xt46q7Hm3awgOy3pIpOdZC0cmpZIKkNsAvwKnAfUPXU0gDg3vT6PqB/evJpTbLB7bGp2+ozSVul8YmDa+SpKmsf4NE0zvEQ0FtShzTY3Tul1alRWxgR8W9J65GN0ncjayZNAcalppoB5w0+kbMu+DtXD72FuXPnstvOO7DBumvVm699u7Ycecj+9D8ii72/OfQA2rdrC8AfzzifWZ/MJiJYf921GHzC7xbrNlhxv7vyBFbs0JaKuRUM+79r+PLTL9jpwKwl+dhND9Nlne4MvPD3VFZWMu2NyVx74hUATJs0hTsvvJkThp1GMzWjYu5cbjjtGmZM/ajedT4x/BEGXvR7zh/1d7745HOu+N3Fi3Ubl2oN18LoAgxNTzo1A4ZHxP2SngGGSzoceA/YFyAiJkgaDkwE5gKDctfJo4DrgTbAg2kCuBYYJmkSWcuifyprpqSzyMaNAc6MiJn1VVixlP9B82WpS8oazhGbn9DYVbAl0NB37izVd1+WL07rX/ias8KZty7y+pYkjT2GYWa2dPHPm5uZWSH+eXMzMyuinMdqlzUOGGZm5XALw8zMCnHAMDOzQvwHlMzMrAj/TW8zMyvGAcPMzArxU1JmZlaIWxhmZlaIA4aZmRURFe6SMjOzItzCMDOzIvxYrZmZFeOAYWZmhTTdIQwHDDOzcsTcphsxHDDMzMrRdOMFzRq7AmZmS5OojMJTXST1kPSYpFckTZD0/1L66ZKmSnohTbvn8pwsaZKk1yT1yaVvJumlNO9SSUrpy0m6LaWPkdQzl2eApDfSNKDItruFYWZWjoZrYcwFjouI5yW1BZ6TNDLNuzgiLsgvLGlDoD+wEdAV+I+k9SKiArgSGAg8C4wA+gIPAocDsyJiHUn9gfOA/SR1BAYDmwOR1n1fRMyqq8JuYZiZlaGhWhgRMT0ink+vPwNeAbrVkaUfcGtEfBMRbwOTgF6SugDtIuKZiAjgBmDPXJ6h6fUdwM6p9dEHGBkRM1OQGEkWZOrkgGFmVo7KMqaCUlfRj4AxKeloSS9Kuk5Sh5TWDZicyzYlpXVLr2umV8sTEXOB2UCnOsqqkwOGmVkZYm7xSdJASeNz08Ca5UlaEbgTOCYiPiXrXlob2ASYDlxYtWip6tSRvrB5auUxDDOzMkQZLYeIGAIMqW2+pJZkweKmiLgr5fkgN/8a4P70dgrQI5e9OzAtpXcvkZ7PM0VSC6A9MDOl71gjz6j6tsctDDOzcjRQl1QaS7gWeCUiLsqld8ktthfwcnp9H9A/Pfm0JrAuMDYipgOfSdoqlXkwcG8uT9UTUPsAj6ZxjoeA3pI6pC6v3imtTm5hmJmVoZwWRj22AQ4CXpL0Qkr7E7C/pE3IuojeAY4EiIgJkoYDE8mesBqUnpACOAq4HmhD9nTUgyn9WmCYpElkLYv+qayZks4CxqXlzoyImfVV2AHDzKwMDRUwImI0pccSRtSR5xzgnBLp44GNS6R/DexbS1nXAdcVrS84YJiZlSUqSl3jmwYHDDOzMjRgl9RSxwHDzKwMUekWhpmZFeAWhpmZFRLhFoaZmRXgFoaZmRVS6aekzMysCA96m5lZIQ4YZmZWSNT7m67LLgcMM7MyuIVhZmaF+LFaMzMrpMJPSZmZWRFuYZiZWSEewzAzs0L8lJSZmRXiFoaZmRVSUdmssavQaBwwzMzK0JS7pJpuqDQzWwiVocJTXST1kPSYpFckTZD0/1J6R0kjJb2R/u+Qy3OypEmSXpPUJ5e+maSX0rxLJSmlLyfptpQ+RlLPXJ4BaR1vSBpQZNsdMMzMyhChwlM95gLHRcT3gK2AQZI2BE4CHomIdYFH0nvSvP7ARkBf4ApJzVNZVwIDgXXT1DelHw7Mioh1gIuB81JZHYHBwJZAL2BwPjDVxgHDzKwMEcWnusuJ6RHxfHr9GfAK0A3oBwxNiw0F9kyv+wG3RsQ3EfE2MAnoJakL0C4inomIAG6okaeqrDuAnVProw8wMiJmRsQsYCTzg0ytlvoxjDZdt2vsKtgSqGXzpf7QtsVgaP2L1Ku+rqY8SQPJ7vyrDImIISWW6wn8CBgDrBoR0yELKpJWSYt1A57NZZuS0uak1zXTq/JMTmXNlTQb6JRPL5GnVj6rzMzKUM5TUik4LBAg8iStCNwJHBMRn6bhh5KLllpFHekLm6dW7pIyMytDlDHVR1JLsmBxU0TclZI/SN1MpP8/TOlTgB657N2BaSm9e4n0ankktQDaAzPrKKtODhhmZmVowKekBFwLvBIRF+Vm3QdUPbU0ALg3l94/Pfm0Jtng9tjUffWZpK1SmQfXyFNV1j7Ao2mc4yGgt6QOabC7d0qrk7ukzMzK0IA/PrgNcBDwkqQXUtqfgHOB4ZIOB94D9s3WGxMkDQcmkj1hNSgiKlK+o4DrgTbAg2mCLCANkzSJrGXRP5U1U9JZwLi03JkRMbO+CiuW8m+htGjVbeneAFssPOhtpXz11buLfLV/crV9Cl9ztnv/jmXqd0R8VpmZlSFKjhc3DQ4YZmZlmOu/h2FmZkW4hWFmZoVUNnYFGpEDhplZGdzCMDOzQtzCMDOzQircwjAzsyKa8F9odcAwMytHpVsYZmZWRFP+aQkHDDOzMnjQ28zMCqms/e9VLPMcMMzMylBR/yLLLAcMM7My+CkpMzMrxE9JmZlZIX5KyszMCnGXlJmZFdKUH6tt1tgVMDNbmlSo+FQfSddJ+lDSy7m00yVNlfRCmnbPzTtZ0iRJr0nqk0vfTNJLad6lUvbsr6TlJN2W0sdI6pnLM0DSG2kaUGTbHTDMzMpQWcZUwPVA3xLpF0fEJmkaASBpQ6A/sFHKc4Wk5mn5K4GBwLppqirzcGBWRKwDXAycl8rqCAwGtgR6AYMldaivsg4YZmZlaMiAERFPADMLrrofcGtEfBMRbwOTgF6SugDtIuKZiAjgBmDPXJ6h6fUdwM6p9dEHGBkRMyNiFjCS0oGrGgcMM7MyhIpPkgZKGp+bBhZczdGSXjbMpn0AABEfSURBVExdVlV3/t2AybllpqS0bul1zfRqeSJiLjAb6FRHWXVywDAzK0M5LYyIGBIRm+emIQVWcSWwNrAJMB24MKWXGhWJOtIXNk+tHDDMzMpQUca0MCLig4ioiIhK4BqyMQbIWgE9cot2B6al9O4l0qvlkdQCaE/WBVZbWXVywDAzK0Olik8LI41JVNkLqHqC6j6gf3ryaU2ywe2xETEd+EzSVml84mDg3lyeqieg9gEeTeMcDwG9JXVIXV69U1qd/D0MM7MyNOT3MCTdAuwIdJY0hezJpR0lbULWRfQOcCRAREyQNByYCMwFBkVEVUPmKLInrtoAD6YJ4FpgmKRJZC2L/qmsmZLOAsal5c6MiHoH35UFm6VXi1bdlu4NsMWiZXPfC9mCvvrq3UX+nvaFq/+q8DXnuPduXKa+F+6zysysDE35DtUBw8ysDP4tKTMzK8R/QMnMzAqpbMKdUg4YZmZlaMq/VuuAYWZWhqbbvnDAMDMri1sYZmZWyFw13TaGA4aZWRmabrhwwDAzK4u7pMzMrBA/VmtmZoU03XDhgGFmVhZ3SZmZWSEVTbiN4YBhZlYGtzDMzKyQcAvDzMyKaMotDP9N70Z0zZALmTblf7zw30dKzv/Zz3rz/HMjGT/uYZ59ZgTbbL3FIq+zVatW3HzTlbw6cTRPj/4Xa6yR/e341VfvxphnH2T8uIf53wuPMvDXBy3yumzhDBp0KOPHP8xzz43k6KMPW2B+u3ZtueOOaxkz5kGee24kBx207yKvs1WrVgwb9ndefvlxnnjiHlZfff5x8dRT9/PssyN47rmRHHHEgYu8rqVdJVF4WtY4YDSiG24Yzk/3qP0EfPTR0Wy62a5svkVvfj3wOK6++oLCZa+xRnceGXn7AumHHbo/s2bNZoMNt+WSS6/hL38+BYDp0z9ku+37sfkWvdl6mz048YRBdOmyavkbZYtkww3X49BD92e77X5Or1592W23nVl77Z7VljnyyIN59dU32HLL3ejTZz/OPfdUWrZsWaj81VfvzkMP3bpA+iGH7MesWbPZeOMduOyyaznnnJOA7LjYaadfsNVWu7P99v04/vij6NJllUXezqVZlDEtaxwwGtGTo8cwc9Yntc7/4osv571eYfnlyf/99QMO+AXPPHU/48c9zBWXn0ezZsV25c9/1pthw7JAcuedD/CTnbYFYM6cOXz77bcALLfccoXLs4a1wQbrMHbsf/nqq6+pqKjgySfH0K9fn2rLRAQrrrgiACussAKzZn3C3LlzAejffy+efPJenn12BJdd9ufC+3GPPXblppvuBOCuu0aw447bADWPi1Y+LoC5ROGpPpKuk/ShpJdzaR0ljZT0Rvq/Q27eyZImSXpNUp9c+maSXkrzLpWklL6cpNtS+hhJPXN5BqR1vCFpQJFt995fwvXr15eXX3qc++4dyq9/fRyQXVR+ue/P2W6HPdl8i95UVFRwwAG/KFRe126rMXnKNAAqKiqYPftTOnXKjsfu3bvy/HMjeeetcfz1gsuZPv2DxbNRVqsJE15n22170bHjSrRp05q+fXeie/eu1Za56qqhbLDBOrz11jjGj3+I448/g4hg/fXXYZ999mCnnfZmq612p6Kikv799yy03q5dV2NK7rj49NPPcsdFF8aO/TdvvPEsF154FdOnf9iwG72UiTL+FXA90LdG2knAIxGxLvBIeo+kDYH+wEYpzxWSmqc8VwIDgXXTVFXm4cCsiFgHuBg4L5XVERgMbAn0AgbnA1NtlthBb0mHRsQ/a5k3kOzDQc3b06zZCt9p3b5L9977b+69999st+2WnHH6CfTZrT8/2WlbNv3R93n2mREAtGnTmo8++hiAO27/Bz17rk6rVi1ZvUc3xo97GIDLLvsHQ28YTrrxqKaq4TJlyjQ23WxXunRZlbvuuJY773qADz/8+LvZUAPgtdcmceGFV3H//TfxxRdf8OKLE+e1HqrsuusOvPjiBPr27c9aa63BAw/cxFNPjWWnnbZh002/z+jR9wHVj4vbbruaNdboQatWrejRoyvPPpsdO5df/k+GDbu9luMiOzCmTJlOr1596dJlFYYPv4a77x7RpI+Lhhz0jogn8nf9ST9gx/R6KDAK+GNKvzUivgHeljQJ6CXpHaBdRDwDIOkGYE/gwZTn9FTWHcDfU+ujDzAyImamPCPJgswtddV3iQ0YwBlAyYAREUOAIQAtWnVbFrsKF/Dk6DGstdYadOrUAUkMu/F2Tjn13AWW22ffI4BsDOO6f1zMzrtWHxCdOmU6Pbp3ZerU6TRv3pz27dsxc+asastMn/4BEya+zrbbbslddz2w+DbKSho69DaGDr0NgDPOOIGpU9+vNv+gg/blwguvAOCtt97lnXcms/76ayOJG2+8g9NOO3+BMvfb70ggG8O45poL6NOnf7X5U6dOp3v3rkyd+j7NmzenXbu2zJxZvbt0+vQPmTjxdbbZphd33z2iwbZ3aVPOY7X5m9tkSLp+1WXViJgOEBHTJVUNGnUDns0tNyWlzUmva6ZX5ZmcyporaTbQKZ9eIk+tGrVLStKLtUwvAU1+xDU/2PmjTTamVauWzJgxi0cfG80v9tqDlVfuBECHDiux+ur17msA/nX/w/Oeqtl775/y2KinAOjWrQutW7cGYKWV2rP11lvw+utvNuDWWFFV+7VHj67069eX4cPvrTZ/8uSp88YYVlmlM+uttxZvv/0ejz32FHvttXvuuGhf+Lh44IH/cOCBewPwi1/szuOPPw1At26r0br1cgCstFI7fvzjzZv8cVFZxhQRQyJi89xUX7Coy4LNwGxsvbb0hc1Tq8ZuYaxK1jSaVSNdwNPffXW+WzcOu5wdtv8xnTt35J23xnPGmRfMe9plyDXD+MVeu/OrX+3DnDlz+fqrrzngwKMAeOWVNzjt9PN5cMQtNGsm5syZy+9/fwrvvTe13nVe989bGXr9pbw6cTSzZn3CAb/6LQDf22Adzj//NCJAgosuuoqXX3518W281eqWW66iY8cOzJkzh2OOOY1PPvl03uOs//jHTZx77qUMGXIh48Y9hCROOeVcZsyYxYwZszjjjAv417+G0axZM+bMmcsf/vB/hY6L66+/jeuuu5iXX36cWbM+4aCDjgZg/fXX4dxzTyUikMQllwxhwoTXFuv2L+kqYrF3anwgqUtqXXQBqgaNpgA9cst1B6al9O4l0vN5pkhqAbQHZqb0HWvkGVVfxRSLf+NrX7l0LfDPiBhdYt7NEXFAfWU0lS4pK0/L5o19L2RLoq++erfUnXVZDlhjr8LXnJvfvbve9aUxjPsjYuP0/q/AjIg4V9JJQMeIOFHSRsDNZIPUXckGxNeNiApJ44DfAWOAEcBlETFC0iDg+xHxG0n9gV9ExC/ToPdzwKapGs8Dm1WNadSmUc+qiDi8jnn1Bgszs+9aQ/40iKRbyO70O0uaQvbk0rnAcEmHA+8B+wJExARJw4GJwFxgUERUpKKOInviqg3ZYPeDKf1aYFgaIJ9J9pQVETFT0lnAuLTcmfUFC2jkFkZDcAvDSnELw0ppiBbGfmvsWfiac9u79yzy+pYkPqvMzMqwLP7kR1EOGGZmZfCv1ZqZWSHfwVNSSywHDDOzMrhLyszMCmnKfw/DAcPMrAwewzAzs0LcJWVmZoUs7d9dWxQOGGZmZahwC8PMzIpwl5SZmRXiLikzMyvELQwzMyvEj9WamVkh/mkQMzMrxF1SZmZWiAOGmZkV4qekzMyskKbcwmjW2BUwM1uaRBn/ipD0jqSXJL0gaXxK6yhppKQ30v8dcsufLGmSpNck9cmlb5bKmSTpUklK6ctJui2lj5HUc2G33QHDzKwMFVFZeCrDThGxSURsnt6fBDwSEesCj6T3SNoQ6A9sBPQFrpDUPOW5EhgIrJumvin9cGBWRKwDXAyct7Db7oBhZlaGiCg8LYJ+wND0eiiwZy791oj4JiLeBiYBvSR1AdpFxDORrfiGGnmqyroD2Lmq9VEuBwwzszJUEoUnSQMljc9NA0sUGcDDkp7LzV81IqYDpP9XSendgMm5vFNSWrf0umZ6tTwRMReYDXRamG33oLeZWRnK+aZ3RAwBhtSz2DYRMU3SKsBISa/WsWyplkHUkV5XnrK5hWFmVobKiMJTERExLf3/IXA30Av4IHUzkf7/MC0+BeiRy94dmJbSu5dIr5ZHUgugPTCz7A3HAcPMrCwN+ZSUpBUkta16DfQGXgbuAwakxQYA96bX9wH905NPa5INbo9N3VafSdoqjU8cXCNPVVn7AI/GQg6wuEvKzKwMZT79VJ9VgbvTGHQL4OaI+LekccBwSYcD7wH7AkTEBEnDgYnAXGBQRFSkso4CrgfaAA+mCeBaYJikSWQti/4LW1kt7d9abNGq29K9AbZYtGzueyFb0FdfvbtQTwflrbfy5oWvOa9/NH6R17ck8VllZlYG/7y5mZkVUnQwe1nkgGFmVga3MMzMrJCKeWPMTY8DhplZGZb2B4UWhQOGmVkZmvLPmztgmJmVwS0MMzMrxE9JmZlZIX5KyszMCmngnwZZqjhgmJmVwWMYZmZWiMcwzMysELcwzMysEH8Pw8zMCnELw8zMCvFTUmZmVogHvc3MrBB3SZmZWSH+preZmRXiFoaZmRXSlMcw1JSj5bJG0sCIGNLY9bAli48LayjNGrsC1qAGNnYFbInk48IahAOGmZkV4oBhZmaFOGAsW9xPbaX4uLAG4UFvMzMrxC0MMzMrxAHDzMwKccBYRkjqK+k1SZMkndTY9bHGJ+k6SR9Kermx62LLBgeMZYCk5sDlwG7AhsD+kjZs3FrZEuB6oG9jV8KWHQ4Yy4ZewKSIeCsivgVuBfo1cp2skUXEE8DMxq6HLTscMJYN3YDJufdTUpqZWYNxwFg2qESan5c2swblgLFsmAL0yL3vDkxrpLqY2TLKAWPZMA5YV9KakloB/YH7GrlOZraMccBYBkTEXOBo4CHgFWB4RExo3FpZY5N0C/AMsL6kKZIOb+w62dLNPw1iZmaFuIVhZmaFOGCYmVkhDhhmZlaIA4aZmRXigGFmZoU4YFijkFQh6QVJL0u6XdLyi1DW9ZL2Sa//UdcPL0raUdLWufe/kXTwwq7brClxwLDG8lVEbBIRGwPfAr/Jz0y/wFu2iDgiIibWsciOwLyAERFXRcQNC7Mus6bGAcOWBE8C66S7/8ck3Qy8JKm5pL9KGifpRUlHAijzd0kTJT0ArFJVkKRRkjZPr/tKel7S/yQ9IqknWWD6Q2rdbCfpdEnHp+U3kfRsWtfdkjrkyjxP0lhJr0va7jv9dMyWEC0auwLWtElqQfZ3PP6dknoBG0fE25IGArMjYgtJywFPSXoY+BGwPvB9YFVgInBdjXJXBq4Btk9ldYyImZKuAj6PiAvScjvnst0A/C4iHpd0JjAYOCbNaxERvSTtntJ3aejPwmxJ54BhjaWNpBfS6yeBa8m6isZGxNspvTfwg6rxCaA9sC6wPXBLRFQA0yQ9WqL8rYAnqsqKiDr/LoSk9sBKEfF4ShoK3J5b5K70/3NAz2KbaLZsccCwxvJVRGyST5AE8EU+ieyO/6Eay+1O/T/frgLLlOOb9H8FPm+sifIYhi3JHgKOktQSQNJ6klYAngD6pzGOLsBOJfI+A+wgac2Ut2NK/wxoW3PhiJgNzMqNTxwEPF5zObOmzHdKtiT7B1n3z/PKmh8fAXsCdwM/AV4CXqfEhT0iPkpjIHdJagZ8COwK/Au4Q1I/4Hc1sg0ArkqP+L4FHLo4NspsaeVfqzUzs0LcJWVmZoU4YJiZWSEOGGZmVogDhpmZFeKAYWZmhThgmJlZIQ4YZmZWyP8HFxl13Wyi5WEAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 2 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "\n",
    "\n",
    "cfmap=metrics.confusion_matrix(y_true=test['target'],  # True labels\n",
    "                         y_pred=xgb_predictions)\n",
    "\n",
    "label = [\"0\", \"1\"]\n",
    "sns.heatmap(cfmap, annot = True, xticklabels = label, yticklabels = label)\n",
    "plt.xlabel(\"Prediction\")\n",
    "plt.title(\"Confusion Matrix for streetcar delay prediction (weighted)\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Summary\n",
    "This notebook shows methods for dealing with structured data in the context of a simple XGBoost model.\n",
    "\n",
    "# Author\n",
    "\n",
    "Mark Ryan is a manager at Intact Insurance."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "try_tf2",
   "language": "python",
   "name": "try_tf2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
